/**********************************************************************
gc_7_reduced_form_men.do

**********************************************************************/
**********
* SET UP *
**********
clear all
set matsize 2000
set more 1

* location for dofiles *
cd "T:\_Projet_4915\dofiles"
global dir "`c(pwd)'"
cd $dir

sysdir set PERSONAL "T:\_Projet_4915\ado"

*********
* GATES *
*********
* Specify which data you want to work with (synthetic = syn, real = rl) *
local ext = "rl"

* Specify which gender to process (men = men, women = women) *
local gender = "men"

* Specify first observed age (24 25 26) *
global start_age "25"

* Specify occupational definition to use (occ_version 1 2)
global occ_version "2"

* Specify firm class definition (1 2 3 4 5 6)
global class_def "6"

* STEP 2: OLS regressions, entrepreneurs *
local gate1 = 1

* STEP X: delete all intermediate datasets *
local gateX = 1

* start log file *
quietly capture log close
quietly log using gc_7_reduced_form_`gender'_`ext', text replace

* specify file locations *
global project_folder "\_Projet_4915"
global data_folder "\_Projet_4915\DATA"
global output_folder "\_Projet_4915\ResultsFolder"
global temp "temp"

local datadir T:\${data_folder}\

*******************************************
* MAKE A GLOBAL VARIABLE FOR TODAY'S DATE *
*******************************************
local tyr = substr("$S_DATE",8,4)
local tmo = substr("$S_DATE",4,3)
local tmd = trim(substr("$S_DATE",1,2))

* make day of month two digits *
local wl = length("`tmd'")
if `wl'==1 {
local tmd2 ="0"+"`tmd'"
}
if `wl'!=1 {
local tmd2 ="`tmd'"
}

* get numeric month, make it two digits *
local di="`tmd2'"+"`tmo'"+"`tyr'"
local edate = date("`di'", "DMY")
local mono = month(`edate')
local ml = length("`mono'")
if `ml'==1 {
local mono2 ="0"+"`mono'"
}
if `ml'!=1 {
local mono2 ="`mono'"
}

* put final date together *
global date = "`tyr'"+"`mono2'"+"`tmd2'"

****************************************
* DEFINE LITTLE PROGRAMS TO PRINT TIME *
****************************************
program define starttime
	display "Started processing at $S_TIME on $S_DATE"
end

program define endtime
	display "Finished processing at $S_TIME on $S_DATE"
end

************************
* START OF THE PROGRAM *
************************

****************************************
* STEP 1: prepare dataset for analysis *
****************************************
disp "***** Started processing STEP 1 *****"
starttime
if 1 == `gate1' ///
{
	disp "***** STEP 1: prepare dataset for analysis *****"
	
	foreach a of global start_age ///
	{
		foreach v of global occ_version ///
		{	
			foreach d of global class_def ///
			{
				local datadir T:\${data_folder}\
				use "`datadir'gc_analyze_me_`gender'_start`a'_occ_v`v'_`ext'.dta", clear
			
				drop eid_Ind eid_all eid_startup sex dob_yr t1h_marst inimdb ///
				res_pc res_prov res_cma_name res_cma_code res_city_size_3c res_city_size_4c res_city_size_5c res_city_size_6c res_pc_inc res_pc_pop res_pc_busipop res_pc_shr_bus ///
				t1h_earn_t4 t1h_tot_inc_calc emp_inc tot_emp_inc t1h_bus_inc_gross t1h_bus_inc_net tot_busi_t4 tot_busi_t4_startups tot_busi_3680 tot_busi_3680_startups y_0 y_1 y_2 y_3_all y_3_startups ///
				naics4_int naics2_int naics3_int entry_yr exit_yr res_area_m
			
				*****************************************************************
				* construct a two digit categorical variable to denote action *
				*****************************************************************
				* first digit denotes occupation *
				* 1 = paid employee, 2 = self-employed, 3 = entrepreneur, 9 = unemployed *
				tostring(occ_v`v'), gen(first_digit)
				replace first_digit = "9" if occ_v`v' == 0
	
				* second digit denotes firm class *
				gen firm_class = 0
				
				if `d' == 1 ///
				{
					replace firm_class = 1 if f1_employment_4c == 1
					replace firm_class = 2 if f1_employment_4c == 2
					replace firm_class = 3 if f1_employment_4c == 3 | f1_employment_4c == 4
				}
				if `d' == 2 ///
				{
					replace firm_class = 1 if f2_employment_4c == 1
					replace firm_class = 2 if f2_employment_4c == 2
					replace firm_class = 3 if f2_employment_4c == 3 | f2_employment_4c == 4
				}
				if `d' == 3 ///
				{
					replace firm_class = 1 if f1_rev_perL_4c == 1
					replace firm_class = 2 if f1_rev_perL_4c == 2
					replace firm_class = 3 if f1_rev_perL_4c == 3 | f1_rev_perL_4c == 4
				}
				if `d' == 4 ///
				{
					replace firm_class = 1 if f2_rev_perL_4c == 1
					replace firm_class = 2 if f2_rev_perL_4c == 2
					replace firm_class = 3 if f2_rev_perL_4c == 3 | f2_rev_perL_4c == 4
				}
				if `d' == 5 ///
				{
					replace firm_class = 1 if f1_payroll_perL_4c == 1
					replace firm_class = 2 if f1_payroll_perL_4c == 2
					replace firm_class = 3 if f1_payroll_perL_4c == 3 | f1_payroll_perL_4c == 4
				}
				if `d' == 6 ///
				{
					replace firm_class = 1 if f2_payroll_perL_4c == 1
					replace firm_class = 2 if f2_payroll_perL_4c == 2
					replace firm_class = 3 if f2_payroll_perL_4c == 3 | f2_payroll_perL_4c == 4
				}
				
				tostring(firm_class), gen(second_digit)
				replace second_digit = "0" if occ_v`v' == 0
				replace second_digit = "0" if occ_v`v' == 2
				replace second_digit = "0" if occ_v`v' == 3
				gen action = first_digit + second_digit
				destring(action), replace
				drop second_digit
	
				*****************************************************************************
				* create categorical variable to denote action over the past 11 periods *
				*****************************************************************************
				capture drop period
				sort pid tax_yr
				by pid: gen period = _n
				tab period
				forvalues i = 1(1)11 ///
				{
					gen L`i'action = .
					by pid: replace L`i'action = action[_n - `i'] if period > `i'
				}
				drop period
	
				***********************************
				* construct vectors of experience *
				***********************************
				* create a list of all possible actions *
				foreach act in ///
				"11" "12" "13" ///
				"20" "30" "90" ///
				{
					forvalues i = 1(1)11 ///
					{
						gen flag`i' = 0
						replace flag`i' = 1 if L`i'action == `act'
					}
					* construct vector of experience accumulated since first year *
					egen exper_`act' = rowtotal(flag*)
					drop flag*
				}
				drop L*
	
				* construct main outcome variable *
				gen ln_y = log(y_v`v')
	
				* construct numeric person ID *
				destring pid, gen(pid_int)
	
				* construct aggregate experience variables *
				gen exper_10 = exper_11 + exper_12 + exper_13
				gen exper_other = exper_11 + exper_12 + exper_13 + exper_20
	
				* construct polynomials of experience *
				foreach var in ///
				"exper_10" "exper_other" ///
				"exper_11" "exper_12" "exper_13" ///
				"exper_20" "exper_30" "exper_90" ///
				{
					gen `var'_sq = `var'*`var'
				}
				replace age = age-`a'
				gen age_sq = age*age
				gen age_cu = age*age_sq
				
				************
				* CLEAN UP *
				************
				sort pid tax_yr
	
				keep if action == 30
	
				local coeffs1 "exper_10 exper_10_sq exper_20 exper_20_sq exper_30 exper_30_sq"
				local coeffs2 "exper_10 exper_10_sq exper_20 exper_20_sq exper_30 exper_30_sq c.exper_10#c.exper_30 c.exper_20#c.exper_30"
				local coeffs3 "exper_11 exper_12 exper_13 exper_10_sq exper_20 exper_20_sq exper_30 exper_30_sq"
				local coeffs4 "exper_11 exper_12 exper_13 exper_10_sq exper_20 exper_20_sq exper_30 exper_30_sq c.exper_11#c.exper_30 c.exper_12#c.exper_30 c.exper_13#c.exper_30 c.exper_20#c.exper_30"
	
				estimates clear

				reg ln_y `coeffs1' if action == 30, cluster(pid_int)
				estimates store m1_ols
				estadd ysumm
				estadd scalar rN_clust = `e(N_clust)'
				estadd scalar rN = `e(N)'
	
	
				reg ln_y `coeffs3' if action == 30, cluster(pid_int)
				estimates store m3_ols
				estadd ysumm
				estadd scalar rN_clust = `e(N_clust)'
				estadd scalar rN = `e(N)'
	
				local datadir T:\${output_folder}\
				disp "`datadir'"
				estout m* using "`datadir'gc_${date}_OLS_`gender'_start`a'_occ_v`v'_def`d'_`ext'.txt", replace ///
				varwidth(12) cells(b(star fmt(%9.3f)) ///
				se(par(`"="("' `")""') fmt(%9.3f))) ///
				starlevels(* 0.1 ** 0.05 *** 0.01) ///
				stats(ymean r2 r2_a rN_clust rN, /// 
				fmt(%9.2f %9.2f %9.2f %9.0g %9.0g) ///
				labels("Mean, Dependent Variable" "R-squared" "adjusted R-squared" "Number of individuals" "Number of obs."))

				* save intermediate dataset to the data_folder *
				local datadir T:\${data_folder}\gc_esamples\
				saveold "`datadir'gc_OLS_`gender'_start`a'_occ_v`v'_def`d'_`ext'.dta", replace v(12)
			}
		}	
	}
	clear
	disp "***** STEP 1: prepare dataset for analysis (COMPLETED) *****"
}
disp "***** Finished processing STEP 1 *****"
endtime


********************************************
* STEP X: delete all intermediate datasets *
********************************************
disp "***** Started processing STEP X *****"
starttime
if 1 == `gateX' ///
{
	disp "***** STEP X: delete all intermediate datasets *****"
	local datadir T:\${data_folder}\
	
	disp "***** STEP X: delete all intermediate datasets (COMPLETED) *****"
}
disp "***** Finished processing STEP X *****"
endtime

********
* EXIT *
********
clear all
log close
